Setup

1 Introduction

1.2 R packages

### run in console only ###

if (!require("librarian")){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(
  assertthat, BiocManager, dplyr, gridExtra, here, mapview,
  prioritizr, prioritizrdata,
  raster, remotes, rgeos, rgdal, scales, sf, sp, stringr,
  units)
if (!require("lpsymphony")){
  BiocManager::install("lpsymphony")
  library(lpsymphony)
}

1.3 Data Setup

dir_data <- here("data/prioritizr")
pu_shp   <- file.path(dir_data, "pu.shp")
pu_url   <- "https://github.com/prioritizr/massey-workshop/raw/main/data.zip"
pu_zip   <- file.path(dir_data, basename(pu_url))
vegetation_tif <- file.path(dir_data, "vegetation.tif")

dir.create(dir_data, showWarnings = F, recursive = T)
if (!file.exists(pu_shp)){
  download.file(pu_url, pu_zip)
  unzip(pu_zip, exdir = dir_data)
  dir_unzip   <- file.path(dir_data, "data")
  files_unzip <- list.files(dir_unzip, full.names = T)
  file.rename(
    files_unzip, 
    files_unzip %>% str_replace("prioritizr/data", "prioritizr"))
  unlink(c(pu_zip, dir_unzip), recursive = T)
}

Data

2.1 Data import

# import planning unit data
pu_data <- as(read_sf(pu_shp), "Spatial")

# format columns in planning unit data
pu_data$locked_in <- as.logical(pu_data$locked_in)
pu_data$locked_out <- as.logical(pu_data$locked_out)

# import vegetation data
veg_data <- stack(vegetation_tif)

2.2 Planning unit data

# print a short summary of the data
print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 5
## names       :   id,             cost, status, locked_in, locked_out 
## min values  :  557, 3.59717531470679,      0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1
# plot the planning unit data
plot(pu_data)

# plot an interactive map of the planning unit data
mapview(pu_data)
# print the structure of object
str(pu_data, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  516 obs. of  5 variables:
##   ..@ polygons   :List of 516
##   ..@ plotOrder  : int [1:516] 69 104 1 122 157 190 4 221 17 140 ...
##   ..@ bbox       : num [1:2, 1:2] 348703 5167775 611932 5344516
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
# print the class of the object
class(pu_data)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# print the slots of the object
slotNames(pu_data)
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"
# print the coordinate reference system
print(pu_data@proj4string)
## CRS arguments:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print number of planning units (geometries) in the data
tot_pu <- nrow(pu_data)
# print the first six rows in the data
head(pu_data@data)
##    id     cost status locked_in locked_out
## 1 557 29.74225      0     FALSE      FALSE
## 2 558 29.87703      0     FALSE      FALSE
## 3 574 28.60687      0     FALSE      FALSE
## 4 575 30.83416      0     FALSE      FALSE
## 5 576 38.75511      0     FALSE      FALSE
## 6 577 38.11618      2      TRUE      FALSE
# print the first six values in the cost column of the attribute data
head(pu_data$cost)
## [1] 29.74225 29.87703 28.60687 30.83416 38.75511 38.11618
# print the highest cost value
max_cost <- max(pu_data$cost)
# print the smallest cost value
min(pu_data$cost)
## [1] 3.597175
# print average cost value
mean(pu_data$cost)
## [1] 26.87393
# plot a map of the planning unit cost data
spplot(pu_data, "cost")

# plot an interactive map of the planning unit cost data
mapview(pu_data, zcol = "cost")

Questions

  1. How many planning units are in the planning unit data?

Answer: There are 516 planning units in the planning unit data.

  1. What is the highest cost value?

Answer: The highest cost value is 47.24.

  1. Is there a spatial pattern in the planning unit cost values (hint: use plot to make a map)?

Answer: Yes, from the plots you can see that planning unit cost values are significantly lower in the eastern part of southern Tasmania than central or western southern Tasmania.

2.3 Vegetation Data

# print a short summary of the data
print(veg_data)
## class      : RasterStack 
## dimensions : 164, 326, 53464, 32  (nrow, ncol, ncell, nlayers)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## names      : vegetation.1, vegetation.2, vegetation.3, vegetation.4, vegetation.5, vegetation.6, vegetation.7, vegetation.8, vegetation.9, vegetation.10, vegetation.11, vegetation.12, vegetation.13, vegetation.14, vegetation.15, ... 
## min values :            0,            0,            0,            0,            0,            0,            0,            0,            0,             0,             0,             0,             0,             0,             0, ... 
## max values :            1,            1,            1,            1,            1,            1,            1,            1,            1,             1,             1,             1,             1,             1,             1, ...
# plot a map of the 20th vegetation class
plot(veg_data[[20]])

# plot an interactive map of the 20th vegetation class
mapview(veg_data[[20]])
# print number of rows in the data
nrow(veg_data)
## [1] 164
# print number of columns  in the data
ncol(veg_data)
## [1] 326
# print number of cells in the data
ncell(veg_data)
## [1] 53464
# print number of layers in the data
nlayers(veg_data)
## [1] 32
# print resolution on the x-axis
xres(veg_data)
## [1] 967
# print resolution on the y-axis
yres(veg_data)
## [1] 1020
# print spatial extent of the grid, i.e. coordinates for corners
extent(veg_data)
## class      : Extent 
## xmin       : 298636.7 
## xmax       : 613878.7 
## ymin       : 5167756 
## ymax       : 5335036
# print the coordinate reference system
print(veg_data@crs)
## CRS arguments:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print a summary of the first layer in the stack
print(veg_data[[1]])
## class      : RasterLayer 
## band       : 1  (of  32  bands)
## dimensions : 164, 326, 53464  (nrow, ncol, ncell)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## source     : vegetation.tif 
## names      : vegetation.1 
## values     : 0, 1  (min, max)
# print the value in the 800th cell in the first layer of the stack
print(veg_data[[1]][800])
##   
## 0
# print the value of the cell located in the 30th row and the 60th column of
# the first layer
print(veg_data[[1]][30, 60])
##   
## 0
# calculate the sum of all the cell values in the first layer
cellStats(veg_data[[1]], "sum")
## [1] 17
# calculate the maximum value of all the cell values in the first layer
cellStats(veg_data[[1]], "max")
## [1] 1
# calculate the minimum value of all the cell values in the first layer
cellStats(veg_data[[1]], "min")
## [1] 0
# calculate the mean value of all the cell values in the first layer
cellStats(veg_data[[1]], "mean")
## [1] 0.00035239

Questions

  1. What part of the study area is the 13th vegetation class found in (hint: make a map)? For instance, is it in the south-eastern part of the study area?

Answer:: The study area of the 13th vegetation class is in the eastern part of Tasmania and is most dense in the north-east.

mapview(veg_data[[13]])
  1. What proportion of cells contain the 12th vegetation class?

Answer: 1.53%

freq_12 <- freq(veg_data[[12]])

prop_12 <- freq_12[2,2] / ncell(veg_data[[12]])
  1. Which vegetation class is the most abundant (i.e. present in the greatest number of cells)?
veg_stats <- cellStats(veg_data, "sum", na.rm = TRUE)

veg_max <- which.max(veg_stats)

Answer: The most abundant vegetation class is 12.

3 Gap Analysis

3.2 Feature Abundance

# create prioritizr problem with only the data
p0 <- problem(pu_data, veg_data, cost_column = "cost")

# print empty problem,
# we can see that only the cost and feature data are defined
print(p0)

# calculate amount of each feature in each planning unit
abundance_data <- feature_abundances(p0)

# print abundance data
print(abundance_data)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## # … with 22 more rows
# note that only the first ten rows are printed,
# this is because the abundance_data object is a tibble (i.e. tbl_df) object
# and not a standard data.frame object
print(class(abundance_data))
## [1] "tbl_df"     "tbl"        "data.frame"
# we can print all of the rows in abundance_data like this
print(abundance_data, n = Inf)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## 11 vegetation.11               23.6                  1
## 12 vegetation.12              748.                   1
## 13 vegetation.13              126.                   1
## 14 vegetation.14               10.5                  1
## 15 vegetation.15               17.5                  1
## 16 vegetation.16               15.0                  1
## 17 vegetation.17              213.                   1
## 18 vegetation.18               14.3                  1
## 19 vegetation.19               17.1                  1
## 20 vegetation.20               21.4                  1
## 21 vegetation.21               18.6                  1
## 22 vegetation.22              297.                   1
## 23 vegetation.23               20.3                  1
## 24 vegetation.24              165.                   1
## 25 vegetation.25              716.                   1
## 26 vegetation.26               24.0                  1
## 27 vegetation.27               18.8                  1
## 28 vegetation.28               17.5                  1
## 29 vegetation.29               24.7                  1
## 30 vegetation.30               59.0                  1
## 31 vegetation.31               60.0                  1
## 32 vegetation.32               32                    1
# add new column with feature abundances in km^2
abundance_data$absolute_abundance_km2 <-
  (abundance_data$absolute_abundance * prod(res(veg_data))) %>%
  set_units(m^2) %>%
  set_units(km^2)

# print abundance data
print(abundance_data)
## # A tibble: 32 × 4
##    feature       absolute_abundance relative_abundance absolute_abundance_km2
##    <chr>                      <dbl>              <dbl>                 [km^2]
##  1 vegetation.1                16.0                  1                   15.8
##  2 vegetation.2                14.3                  1                   14.1
##  3 vegetation.3                10.4                  1                   10.2
##  4 vegetation.4                17.8                  1                   17.6
##  5 vegetation.5                13.0                  1                   12.8
##  6 vegetation.6                14.3                  1                   14.1
##  7 vegetation.7                20.0                  1                   19.7
##  8 vegetation.8                14.0                  1                   13.9
##  9 vegetation.9                18.0                  1                   17.8
## 10 vegetation.10               20.0                  1                   19.7
## # … with 22 more rows
# add new column with feature abundances in km^2
abundance_data$absolute_abundance_km2 <-
  (abundance_data$absolute_abundance * prod(res(veg_data))) %>%
  set_units(m^2) %>%
  set_units(km^2)

# print abundance data
print(abundance_data)
## # A tibble: 32 × 4
##    feature       absolute_abundance relative_abundance absolute_abundance_km2
##    <chr>                      <dbl>              <dbl>                 [km^2]
##  1 vegetation.1                16.0                  1                   15.8
##  2 vegetation.2                14.3                  1                   14.1
##  3 vegetation.3                10.4                  1                   10.2
##  4 vegetation.4                17.8                  1                   17.6
##  5 vegetation.5                13.0                  1                   12.8
##  6 vegetation.6                14.3                  1                   14.1
##  7 vegetation.7                20.0                  1                   19.7
##  8 vegetation.8                14.0                  1                   13.9
##  9 vegetation.9                18.0                  1                   17.8
## 10 vegetation.10               20.0                  1                   19.7
## # … with 22 more rows
# calculate the average abundance of the features
mean(abundance_data$absolute_abundance_km2)
## 86.82948 [km^2]
# plot histogram of the features' abundances
hist(abundance_data$absolute_abundance_km2, main = "Feature abundances")

# find the abundance of the feature with the largest abundance
max(abundance_data$absolute_abundance_km2)
## 737.982 [km^2]
# find the name of the feature with the largest abundance
abundance_data$feature[which.max(abundance_data$absolute_abundance_km2)]
## [1] "vegetation.12"

Questions

  1. What is the median abundance of the features (hint: median)?
median_feat <- median(abundance_data$absolute_abundance_km2)

Answer: The median abundance of the features is 19.12.

  1. What is the name of the feature with smallest abundance?
feat_sm <- abundance_data$feature[which.min(abundance_data$absolute_abundance_km2)]

Answer: The name of the feature with the smallest abundance is feat_sm.

  1. How many features have a total abundance greater than 100 km^2 (hint: use sum(abundance_data$absolute_abundance_km2 > set_units(threshold, km^2) with the correct threshold value)?

Answer: Six features have a total abundance greater than 100 \(km^2\).

abundance_data %>% filter(absolute_abundance_km2 > set_units(100, km^2))
## # A tibble: 6 × 4
##   feature       absolute_abundance relative_abundance absolute_abundance_km2
##   <chr>                      <dbl>              <dbl>                 [km^2]
## 1 vegetation.12               748.                  1                   738.
## 2 vegetation.13               126.                  1                   124.
## 3 vegetation.17               213.                  1                   210.
## 4 vegetation.22               297.                  1                   293.
## 5 vegetation.24               165.                  1                   162.
## 6 vegetation.25               716.                  1                   706.

3.3 Feature representation

# create column in planning unit data with binary values (zeros and ones)
# indicating if a planning unit is covered by protected areas or not
pu_data$pa_status <- as.numeric(pu_data$locked_in)

# calculate feature representation by protected areas
repr_data <- eval_feature_representation_summary(p0, pu_data[, "pa_status"])

# print feature representation data
print(repr_data)
## # A tibble: 32 × 5
##    summary feature       total_amount absolute_held relative_held
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>
##  1 overall vegetation.1          16.0         0            0     
##  2 overall vegetation.2          14.3         0            0     
##  3 overall vegetation.3          10.4         0            0     
##  4 overall vegetation.4          17.8         0            0     
##  5 overall vegetation.5          13.0         0            0     
##  6 overall vegetation.6          14.3         0            0     
##  7 overall vegetation.7          20.0         0            0     
##  8 overall vegetation.8          14.0         0            0     
##  9 overall vegetation.9          18.0         0.846        0.0470
## 10 overall vegetation.10         20.0         0            0     
## # … with 22 more rows

Questions

  1. What is the average proportion of the features held in protected areas (hint: use mean(table$relative_held) with the correct table name)?
mean_pa <- mean(repr_data$relative_held)

Answer: The average proportion of the features held in protected areas is 0.24.

  1. If we set a target of 10% coverage by protected areas, how many features fail to meet this target (hint: use sum(table$relative_held < target_value) with the correct table name)?
target_value_10 = 0.10

pa_10 <- sum(repr_data$relative_held >= target_value_10) 

Answer: 15 features fail to meet this target.

  1. If we set a target of 20% coverage by protected areas, how many features fail to meet this target?
target_value_20 = 0.20

pa_20 <- sum(repr_data$relative_held >= target_value_20)

Answer: 14 features fail to meet this target.

  1. Is there a relationship between the total abundance of a feature and how well it is represented by protected areas (hint: plot(abundance_data\(absolute_abundance ~ repr_data\)relative_held))?
plot(abundance_data$absolute_abundance ~ repr_data$relative_held)

Answer: There doesn’t seem to be a relationship between the total abundance of a feature and how well it is represented by protected areas.

4 Spatial prioritizations

4.2 Starting out simple

# print planning unit data
print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 6
## names       :   id,             cost, status, locked_in, locked_out, pa_status 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1
# make prioritization problem
p1_rds <- file.path(dir_data, "p1.rds")
if (!file.exists(p1_rds)){
  p1 <- problem(pu_data, veg_data, cost_column = "cost") %>%
        add_min_set_objective() %>%
        add_relative_targets(0.05) %>% # 5% representation targets
        add_binary_decisions() %>%
        add_lpsymphony_solver()
  saveRDS(p1, p1_rds)
}
p1 <- readRDS(p1_rds)

# print problem
print(p1)

# solve problem
s1 <- solve(p1)

# print solution, the solution_1 column contains the solution values
# indicating if a planning unit is (1) selected or (0) not
print(s1)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
# calculate number of planning units selected in the prioritization
pu_selected <- eval_n_summary(p1, s1[, "solution_1"])
# calculate total cost of the prioritization
eval_cost_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
##   summary  cost
##   <chr>   <dbl>
## 1 overall  385.
# plot solution
# selected = green, not selected = grey
spplot(s1, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s1",
       colorkey = FALSE)

Questions

  1. How many planing units were selected in the prioritization? What proportion of planning units were selected in the prioritization?
pu_prop_selected <- as.numeric(pu_selected) / length(s1$id)

15 / length(s1$id)
## [1] 0.02906977

Answer: There were NA, 0.03 planning units selected in the prioritization.

  1. Is there a pattern in the spatial distribution of the priority areas?

Answer: I would say there isn’t a pattern in the spatial distribution of the priority areas since they are fairly spread out across all of the study area.

  1. Can you verify that all of the targets were met in the prioritization (hint: eval_feature_representation_summary(p1, s1[, “solution_1”]))?

Answer: Yes you can verify that the 5% target were met in the prioritization since all relative_held values are larger than 5%.

eval_feature_representation_summary(p1, s1[, "solution_1"])
## # A tibble: 32 × 5
##    summary feature       total_amount absolute_held relative_held
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>
##  1 overall vegetation.1          16.0          2.90        0.181 
##  2 overall vegetation.2          14.3          1           0.0699
##  3 overall vegetation.3          10.4          1           0.0963
##  4 overall vegetation.4          17.8          3           0.168 
##  5 overall vegetation.5          13.0          1           0.0769
##  6 overall vegetation.6          14.3          1.94        0.136 
##  7 overall vegetation.7          20.0          1.75        0.0875
##  8 overall vegetation.8          14.0          2.80        0.200 
##  9 overall vegetation.9          18.0          2.16        0.120 
## 10 overall vegetation.10         20.0          2           0.0999
## # … with 22 more rows

4.3 Adding complexity

# plot locked_in data
# TRUE = blue, FALSE = grey
spplot(pu_data, "locked_in", col.regions = c("grey80", "darkblue"),
       main = "locked_in", colorkey = FALSE)

# make prioritization problem
p2_rds <- file.path(dir_data, "p2.rds")
if (!file.exists(p2_rds)){
  p2 <- problem(pu_data, veg_data, cost_column = "cost") %>%
      add_min_set_objective() %>%
      add_relative_targets(0.05) %>%
      add_locked_in_constraints("locked_in") %>%
      add_binary_decisions() %>%
      add_lpsymphony_solver()
  saveRDS(p2, p2_rds)
}
p2 <- readRDS(p2_rds)

# print problem
print(p2)

# solve problem
s2 <- solve(p2)

# plot solution
# selected = green, not selected = grey
spplot(s2, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s2",
       colorkey = FALSE)

# make prioritization problem
p3_rds <- file.path(dir_data, "p3.rds")
if (!file.exists(p3_rds)){
  p3 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p3, p3_rds)
}
p3 <- readRDS(p3_rds)

# print problem
print(p3)

# solve problem
s3 <- solve(p3)

# plot solution
# selected = green, not selected = grey
spplot(s3, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s3",
       colorkey = FALSE)

# plot locked_out data
# TRUE = red, FALSE = grey
spplot(pu_data, "locked_out", col.regions = c("grey80", "darkred"),
       main = "locked_out", colorkey = FALSE)

# make prioritization problem
p4_rds <- file.path(dir_data, "p4.rds")
if (!file.exists(p4_rds)){
  p4 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p4, p4_rds)
}
p4 <- readRDS(p4_rds)

# print problem
print(p4)

# solve problem
s4 <- solve(p4)

# plot solution
# selected = green, not selected = grey
spplot(s4, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s4",
       colorkey = FALSE)

Questions

  1. What is the cost of the planning units selected in s2, s3, and s4?
s2_cost <- as.numeric(eval_cost_summary(p2, s2[, "solution_1"]))

s3_cost <- as.numeric(eval_cost_summary(p3, s3[, "solution_1"]))

s4_cost <- as.numeric(eval_cost_summary(p4, s4[, "solution_1"]))

Answer: The cost of the planning units in s2 is NA, 6600.09. The cost of the planning units in s3 is NA, 6669.91. The cost of the planning units in s4 in NA, 6711.58.

  1. How many planning units are in s2, s3, and s4?
pu_s2 <- eval_n_summary(p2, s2[, "solution_1"])

pu_s3 <- eval_n_summary(p3, s3[, "solution_1"])

pu_s4 <- eval_n_summary(p4, s4[, "solution_1"])

Answer: There are overall, 205 planning units in s2. There are overall, 211 planning units in s3. There are overall, 212 planning units in s4.

  1. Do the solutions with more planning units have a greater cost? Why (or why not)?

Answers: The solutions with more planning units have a greater cost because there are more planning units.

  1. Why does the first solution (s1) cost less than the second solution with protected areas locked into the solution (s2)?

Answers: s1 costs less than s2 because s2 includes the locked_in or protected areas in addition to the prioritization planning units. This is an additional constraint to the prioritization, and so this limits the solution and it might have to pick more expensive planning units.

  1. Why does the third solution (s3) cost less than the fourth solution solution with highly degraded areas locked out (s4)?

I assumed that some planning units had higher costs than others so the solution with fewer constraints could pick the cheapest planning units. When more constraints are added, the solution might have to pick more expensive planning units.

Answers: s4 has more constraints than s3 and so this means the solution probably has to pick more expensive planning units compared to s3, which had fewer constraints. This hints that the highly degraded areas were of lower cost since those areas were included in s3.

4.4 Penalizing fragmentation

# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds)){
  p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.001) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)

# print problem
print(p5)

# solve problem,
# note this will take a bit longer than the previous runs
s5 <- solve(p5)

# print solution
print(s5)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1
# plot solution
# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
       colorkey = FALSE)

Questions

  1. What is the cost of the fourth (s4) and fifth (s5) solutions? Why does the fifth solution (s5) cost more than the fourth (s4) solution?
s4_cost <- as.numeric(eval_cost_summary(p4, s4[, "solution_1"]))

s5_cost <- as.numeric(eval_cost_summary(p5, s5[, "solution_1"]))

Answer: The cost of the s4 soltuion is NA, 6711.58 and the cost of the s5 solution is NA, 6747.59 We are including a boundary penalty and so this clumps the planning units together which are more expensive than the planning units that would have been chosen without the boundary penalty.

  1. Try setting the penalty value to 0.000000001 (i.e. 1e-9) instead of 0.001. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this is a useful penalty value? Why (or why not)?
# make prioritization problem with a penalty value of 1e-9
p6_rds <- file.path(dir_data, "p6.rds")
if (!file.exists(p6_rds)){
  p6 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.000000001) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p6, p6_rds)
}
p6 <- readRDS(p6_rds)

# solve problem,
# note this will take a bit longer than the previous runs
s6 <- solve(p6)
# plot solution
# selected = green, not selected = grey
spplot(s6, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s6",
       colorkey = FALSE)

s6_cost <- as.numeric(eval_cost_summary(p6, s6[, "solution_1"]))

Answer: Those cost of s6 is NA, 6711.76, which is slightly higher than s4. s6 and s4 look pretty identical with some slight differences and so because of this, this is not a useful penalty as it doesn’t do much.

  1. Try setting the penalty value to 0.5. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this a useful penalty value? Why (or why not)?
# make prioritization problem with a penalty value of 0.5
p7_rds <- file.path(dir_data, "p7.rds")
if (!file.exists(p7_rds)){
  p7 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.5) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p7, p7_rds)
}
p7 <- readRDS(p7_rds)

# solve problem,
# note this will take a bit longer than the previous runs
s7 <- solve(p7)
# plot solution
# selected = green, not selected = grey
spplot(s7, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s7",
       colorkey = FALSE)

s7_cost <- as.numeric(eval_cost_summary(p7, s7[, "solution_1"]))

Answer: Those cost of s7 is NA, 9816.64, and is much more expensive than s4. s7 and s4 look very different from each other, as s7 looks like one large clump. This is also not a useful penalty because it is including extra planning units we do not need since the minimizing fragmentation is being considered more than the solution cost in this solution. Additionally, the high cost is also not useful.